﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using org.pdfbox.pdmodel;
using org.pdfbox.util;
using org.pdfbox.cos;
using org.pdfbox.pdmodel.graphics.xobject;
using org.pdfbox.pdmodel.interactive.documentnavigation.outline;
using System.IO;
using iTextSharp.text.pdf;
using Spire.Pdf;
using Spire.Pdf.Graphics;
using System.Data;
using System.Drawing;


namespace PDFParser
{
    public class PDFParser
    {
        public static string Devicename = "";
        public PDFParser( string Devicename_)
        {
            Devicename = Devicename_;
        }
        #region 读取PDF里面的文本方法（中文字符也可以）
        public static DataTable OnCreated(string filepath, int page)
        {
            try
            {
                string pdffilename = filepath;
                PdfReader pdfReader = new PdfReader(pdffilename);
                int numberOfPages = pdfReader.NumberOfPages;
                string text = string.Empty;
                DataTable M_dt = new DataTable();
                M_dt.Columns.Add("page");
                M_dt.Columns.Add("StyleProgram");
                M_dt.Columns.Add("size");
                M_dt.Columns.Add("number");
                M_dt.Columns.Add("po");
                M_dt.Columns.Add("style");
                M_dt.Columns.Add("qty");
                M_dt.Columns.Add("NO1");
                M_dt.Columns.Add("NO2");
                M_dt.Columns.Add("shipfrom1");
                M_dt.Columns.Add("shipfrom2");
                M_dt.Columns.Add("shipfrom3");
                M_dt.Columns.Add("shipfrom4");
                M_dt.Columns.Add("shipto1");
                M_dt.Columns.Add("shipto2");
                M_dt.Columns.Add("shipto3");
                M_dt.Columns.Add("shipto4");
                M_dt.Columns.Add("number1");
                //StreamWriter writer = new StreamWriter("C:\\test.txt");
                for (int i = 1; i <= numberOfPages; ++i)//循环每页
                {
                    if (i == page || page == 0)
                    {
                        iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();
                        text = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(pdfReader, i, strategy);
                        //listBox1.Items.Add(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(pdfReader, i, strategy) + "\n");
                        text = text.Replace("\n", "*");
                        text = text.Replace("\r\n", "*");
                        while (text.ToLower().Contains("* *"))//替换所有换行符
                        {
                            text = text.Replace("* *", "*");
                        }
                        int n = 0;
                        foreach (char c in text)//获取此页文本长度
                        {
                            if (c == '*')
                            {
                                n++;
                            }
                        }
                        string[] new_arr = new string[n + 1];
                        new_arr = string_char(text, n);//将此页文本单个分离出来    
                        page_data(new_arr, i, M_dt);//将每页转化成行数据放进datatable中
                        for (int s = 0; s < M_dt.Rows.Count; s++)
                        {
                            string style_v = M_dt.Rows[s]["style"].ToString().Trim();
                            M_dt.Rows[s]["style"] = System.Text.RegularExpressions.Regex.Replace(style_v, @"[^0-9]+", "");
                        }

                        //int s = 0;
                        //foreach (string str in new_arr)
                        //{
                        //    s++;
                        //    string abc = "第" + i.ToString() + "页," + "第" + s.ToString() + "个是：" + str;
                        //    writer.WriteLine(abc);
                        //}
                    }

                }
                pdfReader.Close();
                //writer.Close();
                return M_dt;
            }
            catch (Exception ex)
            {
                WriteToLog(ex);
                throw new Exception("提示："+ex.Message);
                //return null;
            }
        }
        #endregion

        #region 将每页获取的PDF文本信息单个分离出来
        /// <summary>
        /// 将获取的PDF字符串数据单个分离出来
        /// </summary>
        /// <param name="str">获取的PDF字符串</param>
        public static string[] string_char(string str, int len)
        {

            Boolean b = str.EndsWith("*");
            if (b)
            {
                str = str.Substring(0, str.Length - 1);
            }
            string[] str_arr = new string[len + 1];
            int i = 0;
            foreach (char c in str)
            {
                if (c == '*')
                {
                    i++;
                }
                if (c != '*')
                {
                    str_arr[i] += c.ToString();
                }
                //if (i==9)
                //{
                //    if (i==9)
                //    {
                //    }
                //}
            }
            return str_arr;
        }
        #endregion

        #region 将每页分离的数据保存到datatable的新行中
        public static void page_data(string[] str, int page, DataTable M_dt)
        {
            string shipfrom1 = "", shipfrom2 = "", shipfrom3 = "", shipfrom4= "", shipto1 = "", shipto2 = "", shipto3 = "", shipto4 = "";
            shipfrom1 = str[19].Trim().Substring(str[19].Trim().Length-33, 33);
            shipto1 = str[19].Trim().Replace(shipfrom1, "").Trim();
            shipfrom2 = str[18].Trim().Substring(str[18].Trim().Length - 33, 33);
            shipto2 = str[18].Trim().Replace(shipfrom2, "").Trim();
            shipfrom3 = str[16].Trim().Substring(str[16].Trim().Length - 22, 22);
            shipto3 = str[16].Trim().Replace(shipfrom3, "").Trim();
            shipfrom4= str[17].Trim().Substring(str[17].Trim().Length - 10, 10);
            shipto4 = str[17].Trim().Replace(shipfrom4, "").Trim();
            DataRow dr = M_dt.NewRow();
            object[] objs = { page, str[1], str[2], str[3], str[6], str[7], str[12], str[4], str[10], shipfrom1, shipfrom2, shipfrom3, shipfrom4, shipto1 , shipto2 , shipto3, shipto4, str[15] };
            dr.ItemArray = objs;
            M_dt.Rows.Add(dr);
        }
        #endregion

        /// <summary>
        /// 生成错误信息记录
        /// </summary>
        /// <param name="err"></param>
        public static void WriteToLog(Exception err)
        {
            try
            {
                string path = @"D:\" + Devicename + @"\log\" + DateTime.Now.ToString("yyyy-MM-dd") + ".txt";//日志路径
                if (!Directory.Exists(@"D:\" + Devicename + @"\log"))
                {
                    Directory.CreateDirectory(@"D:\" +Devicename + @"\log");
                }
                using (FileStream fs = new FileStream(path, FileMode.Append, FileAccess.Write))
                {
                    using (StreamWriter sw = new StreamWriter(fs))
                    {
                        sw.BaseStream.Seek(0, SeekOrigin.End);
                        sw.WriteLine("报错时间:" + DateTime.Now.ToString("yyyy-MM-dd HH：mm：ss"));
                        sw.WriteLine("出错文件：" + "原因：" + err.ToString());
                        sw.Flush();
                        sw.Close();
                    }
                }
            }
            catch (Exception ex)
            {
                WriteToLog(ex);
                throw new Exception("提示：" + ex.Message);
            }

        }


    }
}
